rm(list=ls())
rm(list=ls())
library(latex2exp)

## ######################
## Design of simulations
## ######################
sample_size <- c(1500, 4500, 9000)
vio_list <- c(0, 0.05, 0.3)
choice_divergences <- c(rep(0, 3), seq(0, 1, 1/3))
outcome_divergences <- c(seq(0, 1, 1/3), rep(0, 3))
divergences <- seq(1:7)

design <- expand.grid(sample_size, vio_list, divergences)
colnames(design) <- c("sample_size", "vio", "divergence")

sim_size <- 500
sim_total <- sim_size*nrow(design)

# ##########################
# Collecting the Estimates
# ##########################
n_s <- nrow(design)
final_out <- readRDS("simulation_data/HPC_0724_big_clean.rds")
sim_total <- length(final_out)
pica_1_out <- list()
pica_2_out <- list()
pica_2_test_out <- matrix(NA, nrow = sim_total, ncol = 6)
pica_2_after_1_out <- list()
pica_2_after_1_out_boot <- list()
ind <- c()
for(i in 1:sim_total){
  # cat(i)
  # simulation
  
  # load data 
  out_i <- final_out[[i]]
  
  if(is.null(out_i) == FALSE){
    
    ind[i] <- final_out[[i]]$design_num
    
    # PICA-1
    pica_1_out[[i]] <- out_i$bounds.out_1
    
    # PICA-2
    pica_2_out[[i]] <- out_i$out_2_tab
    
    # PICA-2-test
    pica_2_test_out[i, 1:6] <- out_i$test_pica2
    
    # PICA-2-after-PICA-1
    pica_2_after_1_out[[i]] <- out_i$out_2_1_tab
    
  }
}

# ##########################
# Collecting True 
# ##########################
true_effect_mat <- cbind("C" = rep(c(1,2,3), 3), 
                         "treat" = c(rep(1, 6), rep(2,3)), 
                         "ctrl" = c(rep(2, 3), rep(3, 6)))

ACTE_bound_list <- ACTE_2_point_list <- list()
for(i in 1:7){
  choice_divergence <- choice_divergences[i]
  outcome_divergence <- outcome_divergences[i]
  
  true_effect <- readRDS(file.path("simulation_data", paste0("true_out_CD_", round(choice_divergence,2),
                                                  "OD_", round(outcome_divergence, 2), ".rds")))
  
  true_effect_base <- c()
  for(i_use in 1:nrow(true_effect_mat)){
    true_effect_base[i_use] <- true_effect$EY_ca[true_effect_mat[i_use, 1], true_effect_mat[i_use, 2]] - 
      true_effect$EY_ca[true_effect_mat[i_use, 1], true_effect_mat[i_use, 3]]
  }
  
  ACTE_point <- cbind(true_effect_mat, true_effect_base)
  ACTE_bound <- true_effect$bounds$effects
  
  prop_1 <- true_effect$p_c_simpop[-1]/sum(true_effect$p_c_simpop[-1])
  prop_2 <- true_effect$p_c_simpop[-2]/sum(true_effect$p_c_simpop[-2])
  
  true_bounds.boot.new_13_m1 <- c(t(ACTE_bound[c(5, 6), -c(1,2,3)]) %*% prop_1)
  true_bounds.boot.new_23_m2 <- c(t(ACTE_bound[c(7, 9), -c(1,2,3)]) %*% prop_2)
  true_effect_mat_new <- cbind("C" = c(-1, -2), "treat" = c(1, 2), "ctrl" = c(3,3))
  add <- cbind(true_effect_mat_new, rbind(true_bounds.boot.new_13_m1, true_bounds.boot.new_23_m2))
  colnames(add) <- colnames(ACTE_bound)
  ACTE_bound <- rbind(ACTE_bound, add)
  rownames(ACTE_bound) <- NULL
  
  ACTE_bound_list[[i]] <- ACTE_bound
  
  C     <- c(0, 0, 1, 2, -1, -2)
  treat <- c(1, 2, 1, 2, 1, 2)
  ctrl  <- c(3, 3, 3, 3, 3, 3)
  pica2_effect_mat <- cbind(C, treat, ctrl)
  true_effect_base2 <- c()
  ATE_13 <- sum((true_effect$EY_ca[, 1] - true_effect$EY_ca[, 3])*true_effect$p_c_simpop)
  ATE_23 <- sum((true_effect$EY_ca[, 2] - true_effect$EY_ca[, 3])*true_effect$p_c_simpop)
  ACTE_13_C1   <- true_effect$EY_ca[1, 1] - true_effect$EY_ca[1, 3]
  ACTE_23_C2   <- true_effect$EY_ca[2, 2] - true_effect$EY_ca[2, 3]
  ACTE_13_C_m1 <- sum((true_effect$EY_ca[-1, 1] - true_effect$EY_ca[-1, 3])*true_effect$p_c_simpop[-1])/sum(true_effect$p_c_simpop[-1])
  ACTE_23_C_m2 <- sum((true_effect$EY_ca[-2, 2] - true_effect$EY_ca[-2, 3])*true_effect$p_c_simpop[-2])/sum(true_effect$p_c_simpop[-2])
  ACTE_2_point <- cbind(pica2_effect_mat, c(ATE_13, ATE_23, ACTE_13_C1, ACTE_23_C2, ACTE_13_C_m1, ACTE_23_C_m2))
  
  ACTE_2_point_list[[i]] <- ACTE_2_point
}


# ###########################
# 1. PICA-2
# ###########################
pica_2_cilo <- pica_2_cihigh <- matrix(NA, nrow = sim_total, ncol = 6)
pica_2_point <- matrix(NA, nrow = sim_total, ncol = 6)
for(x in 1:sim_total){
  if(is.null(pica_2_out[[x]]) == FALSE){
    pica_2_point[x, 1:6]  <- pica_2_out[[x]][, 4]
    pica_2_cilo[x, 1:6]   <- pica_2_out[[x]][, 7]
    pica_2_cihigh[x, 1:6] <- pica_2_out[[x]][, 8]
  }
}
effect_name <- c("ATE:(1,3)", "ATE:(2,3)", "ACTE:(1,3) when C = 1", "ACTE:(2,3) when C = 2", 
                 TeX(r'(ACTE:(1,3) when C$\neq$1)'), TeX(r'(ACTE:(2,3) when C$\neq$2)'))
pch_use  <- c(rep(19, 3), rep(15, 3), rep(17, 3))
col_use  <- c(rep("black", 3), rep("red", 3), rep("blue", 3))
sample_size <- rep(c(1500, 4500, 9000), 3)
n_s <- nrow(design)

# ###########################
# 2. PICA-1 after PICA-2
# ###########################
use_bound_id <- seq(from = 4, to = 11)
# Prepare 
n_est <- 8
pica_12_bias_lo <- pica_12_bias_high <- matrix(NA, nrow = sim_total, ncol = n_est)
pica_12_ci_cover <- matrix(NA, nrow = sim_total, ncol = n_est)
pica_12_ci_point_cover <- matrix(NA, nrow = sim_total, ncol = n_est)
pica_12_ci_cover_boot <- matrix(NA, nrow = sim_total, ncol = n_est)
for(x in 1:sim_total){
  
  if(is.na(ind[x]) == FALSE){
    
    div_use <- design[ind[x], 3]
    ACTE_bound_use <- ACTE_bound_list[[div_use]]
    
    if(is.null(pica_2_after_1_out[[x]]) == FALSE){
      pica_12_bias_lo[x, 1:n_est]   <- pica_2_after_1_out[[x]][, 4] - ACTE_bound_use[use_bound_id, 4]
      pica_12_bias_high[x, 1:n_est] <- pica_2_after_1_out[[x]][, 5] - ACTE_bound_use[use_bound_id, 5]
      
      pica_12_ci_cover[x, 1:n_est]   <- as.numeric(pica_2_after_1_out[[x]][, 6] <= ACTE_bound_use[use_bound_id, 4])*
        as.numeric(pica_2_after_1_out[[x]][, 7] >= ACTE_bound_use[use_bound_id, 5])
    }
  }
}

# ###########################
# 3. Original PICA-1 design 
# ###########################
use_bound_id <- seq(from = 4, to = 11)
# Prepare 
pica_1_bias_lo <- pica_1_bias_high <- matrix(NA, nrow = sim_total, ncol = n_est)
pica_1_ci_cover <- matrix(NA, nrow = sim_total, ncol = n_est)
for(x in 1:sim_total){
  
  if(is.na(ind[x]) == FALSE){
    
    div_use <- design[ind[x], 3]
    ACTE_bound_use <- ACTE_bound_list[[div_use]]
    
    if(is.null(pica_1_out[[x]]) == FALSE){
      pica_1_bias_lo[x, 1:n_est]   <- pica_1_out[[x]][use_bound_id, 4] - ACTE_bound_use[use_bound_id, 4]
      pica_1_bias_high[x, 1:n_est] <- pica_1_out[[x]][use_bound_id, 5] - ACTE_bound_use[use_bound_id, 5]
      
      pica_1_ci_cover[x, 1:n_est]   <- as.numeric(pica_1_out[[x]][use_bound_id, 6] <= ACTE_bound_use[use_bound_id, 4])*
        as.numeric(pica_1_out[[x]][use_bound_id, 7] >= ACTE_bound_use[use_bound_id, 5])
    }
  }
}

# ##########################
# CI length Comparison
# ##########################
# focus on 4 ACTEs
focus_num <- 4

pica_2_ci_l <- matrix(NA, nrow = sim_total, ncol = focus_num)
pica_1_ci_l <- matrix(NA, nrow = sim_total, ncol = focus_num)
pica_12_ci_l <- matrix(NA, nrow = sim_total, ncol = focus_num)
for(x in 1:sim_total){
  if(is.null(pica_2_out[[x]]) == FALSE){
    pica_2_ci_l[x, 1:focus_num]   <- pica_2_out[[x]][seq(from = 3, to = 6), 8] - pica_2_out[[x]][seq(from = 3, to = 6), 7]
  }
  if(is.null(pica_1_out[[x]]) == FALSE){
    pica_1_ci_l[x, 1:focus_num]   <- pica_1_out[[x]][c(4, 8, 10, 11), 7] - pica_1_out[[x]][c(4, 8, 10, 11), 6]
  }
  if(is.null(pica_2_after_1_out[[x]]) == FALSE){
    pica_12_ci_l[x, 1:focus_num]   <- pica_2_after_1_out[[x]][c(1, 5, 7, 8), 7] - pica_2_after_1_out[[x]][c(1, 5, 7, 8), 6]
  }
}
pica_12_ci_tab <- pica_1_ci_tab <- matrix(NA, nrow = focus_num, ncol = n_s)
pica_2_ci_tab <- matrix(NA, nrow = focus_num, ncol = n_s)
for(i in 1:focus_num){
  pica_2_ci_tab[i, 1:n_s] <- tapply(pica_2_ci_l[, i], ind, mean)
  pica_1_ci_tab[i, 1:n_s]   <- tapply(pica_1_ci_l[, i], ind, mean)
  pica_12_ci_tab[i, 1:n_s] <- tapply(pica_12_ci_l[, i], ind, mean)
}

## #################################
## Appendix B.1: When the Assumption Holds 
## #################################
diff_ci <- pica_2_ci_tab/pica_1_ci_tab
gain <- diff_ci[, design$vio == 0]
gain_design <- cbind(design[design$vio == 0,], "ratio" = apply(gain, 2, mean))
for(i in 1:7){
  gain_design$outcome_div[gain_design$divergence == i] <- outcome_divergences[i]
  gain_design$choice_div[gain_design$divergence == i] <- choice_divergences[i]
}

pdf("figures/Figure_B1.pdf", height = 3.75, width = 8)
par(mar = c(4, 4, 4, 2))
plot(seq(1:21), gain_design$ratio, pch = c(19, 16, 15), col = c("black", "red", "blue"), ylim = c(0, 0.6),
     xaxt = "n", ylab = "Proportions", xlab = "Simulation Settings",
     main = "Length of the PA Estimator's Confidence Interval \n as a Proportion of the Bounds' Confidence Interval")
for(i in 1:6){
  abline(v = 3.5 +(i-1)*3, lty = 2)
}
for(i in 1:7){
  Axis(side = 1, at = 2 + (i-1)*3, labels = paste0("OD = ",  sprintf("%.2f", round(outcome_divergences[i], 2)), 
                                                 "\nCD = ", sprintf("%.2f", round(choice_divergences[i], 2))),
       tick = 0, line = 0.5)
}
legend("topright", legend = c("sample size = 1500", "sample size = 4500", "sample size = 9000"), 
       pch = c(19, 16, 15), col = c("black", "red", "blue"), bg = "white")
dev.off()

## ##################################
## Appendix B.2: When the Assumption is Violated
## ##################################

diff_ci_12 <- pica_12_ci_tab/pica_1_ci_tab
loss <- diff_ci_12[, design$vio != 0]
loss_design <- cbind(design[design$vio != 0,], "ratio" = apply(loss, 2, mean))
for(i in 1:7){
  loss_design$outcome_div[loss_design$divergence == i] <- outcome_divergences[i]
  loss_design$choice_div[loss_design$divergence == i] <- choice_divergences[i]
}

# we focus on vio == 0.30
loss_design_use <- loss_design[loss_design$vio == 0.30, ]

pdf("figures/Figure_B2.pdf", height = 3.75, width = 8)
par(mar = c(4, 4, 4, 2))
plot(seq(1:21), loss_design_use$ratio, pch = c(19, 16, 15), col = c("black", "red", "blue"), ylim = c(0.95, 1.1),
     xaxt = "n", ylab = "Proportions", xlab = "Simulation Settings",
     main = "Length of the Bounds's Confidence Interval under PICA-2 \n as a Proportion of the one under the original PICA")
abline(h = 1, lwd = 2, lty = 2)
for(i in 1:6){
  abline(v = 3.5 +(i-1)*3, lty = 2)
}
for(i in 1:7){
  Axis(side = 1, at = 2 + (i-1)*3, labels = paste0("OD = ",  sprintf("%.2f", round(outcome_divergences[i], 2)), 
                                                   "\nCD = ", sprintf("%.2f", round(choice_divergences[i], 2))),
       tick = 0, line = 0.5)
}
legend("topright", legend = c("sample size = 1500", "sample size = 4500", "sample size = 9000"), 
       pch = c(19, 16, 15), col = c("black", "red", "blue"), bg = "white")
dev.off()


# ############################
# Appendix B.3
# ############################

## ########################
## Conditional Inference: conditional on acceptance (NEW: 09/29/2024)
## ########################
cover_pica2_c <- matrix(NA, nrow = 6, ncol = n_s)
for(i in 1:6){
  for(j in 1:n_s){
    reject <- as.numeric(pica_2_test_out[,3] <= 0.05)
    point_true_effect <- ACTE_2_point_list[[design$divergence[j]]][i, 4]
    ci_cover <- as.numeric(pica_2_cilo[ind == j, i] <= point_true_effect)*as.numeric(pica_2_cihigh[ind == j, i] >= point_true_effect)
    cover_pica2_c[i,j] <- mean(ci_cover[reject[ind == j] == 0], na.rm = TRUE)
  }
}

use_estimate2 <- seq(from = 3, to = 6)

cover_pica2_use_c <-  cover_pica2_c[use_estimate2, ]
ci_000 <- apply(cover_pica2_use_c[, which(design$vio == 0)], 2, mean)

# Figure B3
pdf("figures/Figure_B3.pdf", height = 3.75, width = 8)
par(mar = c(4, 4, 4, 2))
plot(seq(1:21), ci_000, pch = c(19, 16, 15), col = c("black", "red", "blue"), ylim = c(0.8, 1),
     xaxt = "n", ylab = "Proportions", xlab = "Simulation Settings",
     main = "Coverage: Confidence Interval of the PICA-2 estimator \n after the placebo test")
abline(h = 0.95, lty = 2)
for(i in 1:6){
  abline(v = 3.5 +(i-1)*3, lty = 2)
}
for(i in 1:7){
  Axis(side = 1, at = 2 + (i-1)*3, labels = paste0("OD = ",  sprintf("%.2f", round(outcome_divergences[i], 2)), 
                                                   "\nCD = ", sprintf("%.2f", round(choice_divergences[i], 2))),
       tick = 0, line = 0.5)
}
legend("bottomright", legend = c("sample size = 1500", "sample size = 4500", "sample size = 9000"), 
       pch = c(19, 16, 15), col = c("black", "red", "blue"), bg = "white")
dev.off()


## ########################
## Conditional Inference: conditional on rejection (NEW: 09/29/2024)
## ########################
use_estimate <- c(1, 5, 7, 8)
cover_pica12_c <- matrix(NA, nrow = n_est, ncol = n_s)
cover_point_pica12_c <- matrix(NA, nrow = n_est, ncol = n_s)
cover_pica12_boot_c <- matrix(NA, nrow = n_est, ncol = n_s)
for(i in 1:n_est){
  
  reject <- as.numeric(pica_2_test_out[,3] <= 0.05)
  # reject <- rep(1, length(pica_2_test_out[,3]))
  
  cover_pica12_c[i, 1:n_s] <- tapply(pica_12_ci_cover[reject == 1, i], ind[reject == 1], mean)
  cover_point_pica12_c[i, 1:n_s] <- tapply(pica_12_ci_point_cover[reject == 1, i], ind[reject == 1], mean)
  cover_pica12_boot_c[i, 1:n_s] <- tapply(pica_12_ci_cover_boot[reject == 1, i], ind[reject == 1], mean)
}

cover_pica12_use_c <-  cover_pica12_c[use_estimate, ]

# columns are different simulation designs 
# rows are different estimands 
ci_030 <- apply(cover_pica12_use_c[, which(design$vio == 0.30)], 2, mean)

# Figure B4
pdf("figures/Figure_B4.pdf", height = 3.75, width = 8)
par(mar = c(4, 4, 4, 2))
plot(seq(1:21), ci_030, pch = c(19, 16, 15), col = c("black", "red", "blue"), ylim = c(0.8, 1),
     xaxt = "n", ylab = "Proportions", xlab = "Simulation Settings",
     main = "Coverage: Confidence Interval of the PICA bound estimator \n after the placebo test")
abline(h = 0.95, lty = 2)
for(i in 1:6){
  abline(v = 3.5 +(i-1)*3, lty = 2)
}
for(i in 1:7){
  Axis(side = 1, at = 2 + (i-1)*3, labels = paste0("OD = ",  sprintf("%.2f", round(outcome_divergences[i], 2)), 
                                                   "\nCD = ", sprintf("%.2f", round(choice_divergences[i], 2))),
       tick = 0, line = 0.5)
}
legend("bottomright", legend = c("sample size = 1500", "sample size = 4500", "sample size = 9000"), 
       pch = c(19, 16, 15), col = c("black", "red", "blue"), bg = "white")
dev.off()
